# library(devtools)
# install_github('MacoskoLab/liger')
library(tidyverse)
── Attaching packages ───────────────────────────────────────────────────────────────────────────────────── tidyverse 1.2.1 ──
✔ ggplot2 3.2.1     ✔ purrr   0.3.2
✔ tibble  2.1.3     ✔ dplyr   0.8.3
✔ tidyr   1.0.0     ✔ stringr 1.4.0
✔ readr   1.3.1     ✔ forcats 0.4.0
── Conflicts ──────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
✖ tidyr::expand() masks Matrix::expand()
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
✖ tidyr::pack()   masks Matrix::pack()
✖ tidyr::unpack() masks Matrix::unpack()
library(liger)
library(Rtsne)
library(reshape2)

Attaching package: ‘reshape2’

The following object is masked from ‘package:tidyr’:

    smiths
library(ggrepel)

Testing LIGER for integration of scATAC and scRNA data. Following vignette from (gitHub)[https://macoskolab.github.io/liger/walkthrough_rna_atac.html].

1. Download and load datasets:

ATAC data is preprocessed to gene-level features (no. of reads that overlap gene body and promoter for each gene). Data is downloaded from (here)[https://umich.app.box.com/s/5hkhpou4inrulo570yhc38ay8g3ont5b]

rna_clusts = readRDS("~/10X_data/rna_cluster_assignments.RDS")
atac_clusts = readRDS("~/10X_data/atac_cluster_assignments.RDS")
pbmc.atac <- readRDS('~/10X_data/pbmc.atac.expression.mat.RDS')
pbmc.rna <- readRDS('~/10X_data/pbmc.rna.expression.mat.RDS')

2. Create LIGER object

List of count matrices as input. createLiger converts to sparseMatrices.

pbmc.data = list(atac=pbmc.atac[,names(atac_clusts)], rna=pbmc.rna[,names(rna_clusts)])
int.pbmc <- createLiger(pbmc.data)
[1] "Removing 97 genes not expressing in atac."
[1] "Removing 11048 genes not expressing in rna."

3. Data preprocessing

## Normalize to column total (adj. for sequencing depth)
int.pbmc <- normalize(int.pbmc)
## Select highly variable genes ONLY IN THE RNA DATASET
int.pbmc <- selectGenes(int.pbmc, datasets.use = 2)
## Scale to the SD
int.pbmc <- scaleNotCenter(int.pbmc)

4. Run NMF

int.pbmc <- optimizeALS(int.pbmc, k=20)

  |                                                                                                                           
  |                                                                                                                     |   0%
  |                                                                                                                           
  |=                                                                                                                    |   1%
  |                                                                                                                           
  |==                                                                                                                   |   2%
  |                                                                                                                           
  |====                                                                                                                 |   3%
  |                                                                                                                           
  |=====                                                                                                                |   4%
  |                                                                                                                           
  |======                                                                                                               |   5%
  |                                                                                                                           
  |=======                                                                                                              |   6%
  |                                                                                                                           
  |========                                                                                                             |   7%
  |                                                                                                                           
  |=========                                                                                                            |   8%
  |                                                                                                                           
  |===========                                                                                                          |   9%
  |                                                                                                                           
  |============                                                                                                         |  10%
  |                                                                                                                           
  |=============                                                                                                        |  11%
  |                                                                                                                           
  |==============                                                                                                       |  12%
  |                                                                                                                           
  |===============                                                                                                      |  13%
  |                                                                                                                           
  |================                                                                                                     |  14%
  |                                                                                                                           
  |==================                                                                                                   |  15%
  |                                                                                                                           
  |===================                                                                                                  |  16%
  |                                                                                                                           
  |=====================================================================================================================| 100%
Converged in 3.941199 mins, 16 iterations.
Best results with seed 1.
smp <- sample(1:nrow(int.pbmc@H$atac), size = 1000)
smp_genes <- sample(1:length(int.pbmc@var.genes), size = 100)
int.pbmc@W[,smp_genes] %>% heatmap(Rowv = NA)
Error in int.pbmc@W[, smp_genes] %>% heatmap(Rowv = NA) : 
  could not find function "%>%"

Align factor

Dodgy quantile normalization

# saveRDS(int.pbmc, file = "~/10X_data/pbmc.trainedLIGER.RDS")
int.pbmc <- readRDS(int.pbmc, file = "~/10X_data/pbmc.trainedLIGER.RDS")
Error in readRDS(int.pbmc, file = "~/10X_data/pbmc.trainedLIGER.RDS") : 
  object 'int.pbmc' not found

int.pbmc@scale.data$atac[,
                         smp_genes] %>%
  melt(varnames=c("cell", "gene")) %>%
  ggplot(aes(value, color=gene)) + geom_histogram() +
  facet_wrap(gene~.)
int.pbmc@scale.data$rna[,
                         smp_genes] %>%
  melt(varnames=c("cell", "gene")) %>%
  ggplot(aes(value, color=gene)) + geom_histogram() +
  facet_wrap(gene~.)

  

Visualize dataset-specific VS common latent factors

Even in dataset specific component there is separation of cell types (Meaningful information that is ATAC specific?)

atac.comp <- int.pbmc@H$atac[smp,] %*% int.pbmc@V$atac
Error: object 'smp' not found
rna.comp <- int.pbmc@H$rna[smp,] %*% int.pbmc@V$rna
rna.common.comp <- int.pbmc@H$rna[smp,] %*% int.pbmc@W
tsne.rna.comp <- Rtsne(rna.comp, pca=F, check_duplicates = F, theta=0.5, perplexity=30)
rownames(tsne.rna.comp$Y) <- rownames(rna.comp)
tsne.rna.common.comp <- Rtsne(rna.common.comp, pca=F, check_duplicates = F, theta=0.5, perplexity=30)
rownames(tsne.rna.common.comp$Y) <- rownames(rna.common.comp)
tsne.rna.comp$Y %>%
  as.tibble(rownames="cell") %>%
  mutate(clust=rna_clusts[cell]) %>%
  ggplot(aes(V1, V2, color=clust)) + 
  geom_point()

tsne.rna.common.comp$Y %>%
  as.tibble(rownames="cell") %>%
  mutate(clust=rna_clusts[cell]) %>%
  ggplot(aes(V1, V2, color=clust)) + 
  geom_point()

Compare weights of dataset specific and common factors

LS0tCnRpdGxlOiAiVGVzdGluZyBMSUdFUiIKb3V0cHV0OiBodG1sX25vdGVib29rCi0tLQoKYGBge3J9CiMgbGlicmFyeShkZXZ0b29scykKIyBpbnN0YWxsX2dpdGh1YignTWFjb3Nrb0xhYi9saWdlcicpCmxpYnJhcnkodGlkeXZlcnNlKQpsaWJyYXJ5KGxpZ2VyKQpsaWJyYXJ5KFJ0c25lKQpsaWJyYXJ5KHJlc2hhcGUyKQpsaWJyYXJ5KGdncmVwZWwpCmBgYAoKVGVzdGluZyBMSUdFUiBmb3IgaW50ZWdyYXRpb24gb2Ygc2NBVEFDIGFuZCBzY1JOQSBkYXRhLiBGb2xsb3dpbmcgdmlnbmV0dGUgZnJvbSAoZ2l0SHViKVtodHRwczovL21hY29za29sYWIuZ2l0aHViLmlvL2xpZ2VyL3dhbGt0aHJvdWdoX3JuYV9hdGFjLmh0bWxdLgoKIyMjIDEuIERvd25sb2FkIGFuZCBsb2FkIGRhdGFzZXRzOiAKQVRBQyBkYXRhIGlzIHByZXByb2Nlc3NlZCB0byBnZW5lLWxldmVsIGZlYXR1cmVzIChuby4gb2YgcmVhZHMgdGhhdCBvdmVybGFwIGdlbmUgYm9keSBhbmQgcHJvbW90ZXIgZm9yIGVhY2ggZ2VuZSkuIERhdGEgaXMgZG93bmxvYWRlZCBmcm9tIChoZXJlKVtodHRwczovL3VtaWNoLmFwcC5ib3guY29tL3MvNWhraHBvdTRpbnJ1bG81NzB5aGMzOGF5OGczb250NWJdCmBgYHtyfQpybmFfY2x1c3RzID0gcmVhZFJEUygifi8xMFhfZGF0YS9ybmFfY2x1c3Rlcl9hc3NpZ25tZW50cy5SRFMiKQphdGFjX2NsdXN0cyA9IHJlYWRSRFMoIn4vMTBYX2RhdGEvYXRhY19jbHVzdGVyX2Fzc2lnbm1lbnRzLlJEUyIpCnBibWMuYXRhYyA8LSByZWFkUkRTKCd+LzEwWF9kYXRhL3BibWMuYXRhYy5leHByZXNzaW9uLm1hdC5SRFMnKQpwYm1jLnJuYSA8LSByZWFkUkRTKCd+LzEwWF9kYXRhL3BibWMucm5hLmV4cHJlc3Npb24ubWF0LlJEUycpCmBgYAoKIyMjIDIuIENyZWF0ZSBMSUdFUiBvYmplY3QKTGlzdCBvZiBjb3VudCBtYXRyaWNlcyBhcyBpbnB1dC4gYGNyZWF0ZUxpZ2VyYCBjb252ZXJ0cyB0byBzcGFyc2VNYXRyaWNlcy4gCmBgYHtyfQpwYm1jLmRhdGEgPSBsaXN0KGF0YWM9cGJtYy5hdGFjWyxuYW1lcyhhdGFjX2NsdXN0cyldLCBybmE9cGJtYy5ybmFbLG5hbWVzKHJuYV9jbHVzdHMpXSkKaW50LnBibWMgPC0gY3JlYXRlTGlnZXIocGJtYy5kYXRhKQoKYGBgCiMjIyAzLiBEYXRhIHByZXByb2Nlc3NpbmcKYGBge3J9CiMjIE5vcm1hbGl6ZSB0byBjb2x1bW4gdG90YWwgKGFkai4gZm9yIHNlcXVlbmNpbmcgZGVwdGgpCmludC5wYm1jIDwtIG5vcm1hbGl6ZShpbnQucGJtYykKIyMgU2VsZWN0IGhpZ2hseSB2YXJpYWJsZSBnZW5lcyBPTkxZIElOIFRIRSBSTkEgREFUQVNFVAppbnQucGJtYyA8LSBzZWxlY3RHZW5lcyhpbnQucGJtYywgZGF0YXNldHMudXNlID0gMikKIyMgU2NhbGUgdG8gdGhlIFNECmludC5wYm1jIDwtIHNjYWxlTm90Q2VudGVyKGludC5wYm1jKQpgYGAKCiMjIyA0LiBSdW4gTk1GIAoKYGBge3J9CmludC5wYm1jIDwtIG9wdGltaXplQUxTKGludC5wYm1jLCBrPTIwKQpgYGAKYGBge3J9CnNtcCA8LSBzYW1wbGUoMTpucm93KGludC5wYm1jQEgkYXRhYyksIHNpemUgPSAxMDAwKQpzbXBfZ2VuZXMgPC0gc2FtcGxlKDE6bGVuZ3RoKGludC5wYm1jQHZhci5nZW5lcyksIHNpemUgPSAxMDApCgppbnQucGJtY0BXWyxzbXBfZ2VuZXNdICU+JSBoZWF0bWFwKFJvd3YgPSBOQSkKaW50LnBibWNAViRhdGFjWyxzbXBfZ2VuZXNdICU+JSBoZWF0bWFwKFJvd3YgPSBOQSkKYGBgCmBgYHtyfQoKYGBgCgojIyMgQWxpZ24gZmFjdG9yCkRvZGd5IHF1YW50aWxlIG5vcm1hbGl6YXRpb24KYGBge3J9CmludC5wYm1jIDwtIHF1YW50aWxlQWxpZ25TTkYoaW50LnBibWMpCiMgc2F2ZVJEUyhpbnQucGJtYywgZmlsZSA9ICJ+LzEwWF9kYXRhL3BibWMudHJhaW5lZExJR0VSLlJEUyIpCmludC5wYm1jIDwtIHJlYWRSRFMoZmlsZSA9ICJ+LzEwWF9kYXRhL3BibWMudHJhaW5lZExJR0VSLlJEUyIpCmBgYAoKPCEtLSAjIyMgVmlzdWFsaXplICAtLT4KPCEtLSBgYGB7cn0gLS0+CjwhLS0gcnVuTXlUU05FIDwtIGZ1bmN0aW9uKG9iamVjdCwgdXNlLnJhdyA9IEYsIGRpbXMudXNlID0gMTpuY29sKG9iamVjdEBILm5vcm0pLCB1c2UucGNhID0gRiwgLS0+CjwhLS0gICAgICAgICAgICAgICAgICAgICBwZXJwbGV4aXR5ID0gMzAsIHRoZXRhID0gMC41LCBtZXRob2QgPSAnUnRzbmUnLCBmaXRzbmUucGF0aCA9IE5VTEwsIC0tPgo8IS0tICAgICAgICAgICAgICAgICAgICAgcmFuZC5zZWVkID0gNDIpIHsgLS0+CjwhLS0gICBkYXRhLnVzZSA8LSBkby5jYWxsKHJiaW5kLCBvYmplY3RASCkgICAtLT4KPCEtLSAgIHNldC5zZWVkKHJhbmQuc2VlZCkgLS0+CjwhLS0gICBvYmplY3RAdHNuZS5jb29yZHMgPC0gUnRzbmUob2JqZWN0QEgubm9ybVssIGRpbXMudXNlXSwgcGNhID0gdXNlLnBjYSwgY2hlY2tfZHVwbGljYXRlcyA9IEYsIC0tPgo8IS0tICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgdGhldGEgPSB0aGV0YSwgcGVycGxleGl0eSA9IHBlcnBsZXhpdHkpJFkgLS0+CjwhLS0gICByb3duYW1lcyhvYmplY3RAdHNuZS5jb29yZHMpIDwtIHJvd25hbWVzKGRhdGEudXNlKSAtLT4KPCEtLSAgIHJldHVybihvYmplY3QpIC0tPgo8IS0tIH0gLS0+CgpgYGB7cn0Kc21wX2dlbmVzIDwtIHNhbXBsZSgxOjIwMDAsMTApCmludC5wYm1jQHNjYWxlLmRhdGEkYXRhY1ssCiAgICAgICAgICAgICAgICAgICAgICAgICBzbXBfZ2VuZXNdICU+JQogIG1lbHQodmFybmFtZXM9YygiY2VsbCIsICJnZW5lIikpICU+JQogIGdncGxvdChhZXModmFsdWUsIGNvbG9yPWdlbmUpKSArIGdlb21faGlzdG9ncmFtKCkgKwogIGZhY2V0X3dyYXAoZ2VuZX4uKQoKaW50LnBibWNAc2NhbGUuZGF0YSRybmFbLAogICAgICAgICAgICAgICAgICAgICAgICAgc21wX2dlbmVzXSAlPiUKICBtZWx0KHZhcm5hbWVzPWMoImNlbGwiLCAiZ2VuZSIpKSAlPiUKICBnZ3Bsb3QoYWVzKHZhbHVlLCBjb2xvcj1nZW5lKSkgKyBnZW9tX2hpc3RvZ3JhbSgpICsKICBmYWNldF93cmFwKGdlbmV+LikKICAKYGBgCgoKPCEtLSBgYGAgLS0+CgojIyMgVmlzdWFsaXplIGRhdGFzZXQtc3BlY2lmaWMgVlMgY29tbW9uIGxhdGVudCBmYWN0b3JzCgpFdmVuIGluIGRhdGFzZXQgc3BlY2lmaWMgY29tcG9uZW50IHRoZXJlIGlzIHNlcGFyYXRpb24gb2YgY2VsbCB0eXBlcyAoTWVhbmluZ2Z1bCBpbmZvcm1hdGlvbiB0aGF0IGlzIEFUQUMgc3BlY2lmaWM/KQoKYGBge3J9CgphdGFjLmNvbXAgPC0gaW50LnBibWNASCRhdGFjW3NtcCxdICUqJSBpbnQucGJtY0BWJGF0YWMKYXRhYy5jb21tb24uY29tcCA8LSBpbnQucGJtY0BIJGF0YWNbc21wLF0gJSolIGludC5wYm1jQFcKCnRzbmUuYXRhYy5jb21wIDwtIFJ0c25lKGF0YWMuY29tcCwgcGNhPVQsIGNoZWNrX2R1cGxpY2F0ZXMgPSBGLCB0aGV0YT0wLjUsIHBlcnBsZXhpdHk9MTApCnJvd25hbWVzKHRzbmUuYXRhYy5jb21wJFkpIDwtIHJvd25hbWVzKGF0YWMuY29tcCkKdHNuZS5hdGFjLmNvbW1vbi5jb21wIDwtIFJ0c25lKGF0YWMuY29tbW9uLmNvbXAsIHBjYT1ULCBjaGVja19kdXBsaWNhdGVzID0gRiwgdGhldGE9MC41LCBwZXJwbGV4aXR5PTMwKQpyb3duYW1lcyh0c25lLmF0YWMuY29tbW9uLmNvbXAkWSkgPC0gcm93bmFtZXMoYXRhYy5jb21tb24uY29tcCkKCgp0c25lLmF0YWMuY29tcCRZICU+JQogIGFzLnRpYmJsZShyb3duYW1lcz0iY2VsbCIpICU+JQogIG11dGF0ZShjbHVzdD1hdGFjX2NsdXN0c1tjZWxsXSkgJT4lCiAgZ2dwbG90KGFlcyhWMSwgVjIsIGNvbG9yPWNsdXN0KSkgKyAKICBnZW9tX3BvaW50KCkKCnRzbmUuYXRhYy5jb21tb24uY29tcCRZICU+JQogIGFzLnRpYmJsZShyb3duYW1lcz0iY2VsbCIpICU+JQogIG11dGF0ZShjbHVzdD1hdGFjX2NsdXN0c1tjZWxsXSkgJT4lCiAgZ2dwbG90KGFlcyhWMSwgVjIsIGNvbG9yPWNsdXN0KSkgKyAKICBnZW9tX3BvaW50KCkKCmBgYAoKYGBge3J9CgpybmEuY29tcCA8LSBpbnQucGJtY0BIJHJuYVtzbXAsXSAlKiUgaW50LnBibWNAViRybmEKcm5hLmNvbW1vbi5jb21wIDwtIGludC5wYm1jQEgkcm5hW3NtcCxdICUqJSBpbnQucGJtY0BXCgp0c25lLnJuYS5jb21wIDwtIFJ0c25lKHJuYS5jb21wLCBwY2E9RiwgY2hlY2tfZHVwbGljYXRlcyA9IEYsIHRoZXRhPTAuNSwgcGVycGxleGl0eT0zMCkKcm93bmFtZXModHNuZS5ybmEuY29tcCRZKSA8LSByb3duYW1lcyhybmEuY29tcCkKdHNuZS5ybmEuY29tbW9uLmNvbXAgPC0gUnRzbmUocm5hLmNvbW1vbi5jb21wLCBwY2E9RiwgY2hlY2tfZHVwbGljYXRlcyA9IEYsIHRoZXRhPTAuNSwgcGVycGxleGl0eT0zMCkKcm93bmFtZXModHNuZS5ybmEuY29tbW9uLmNvbXAkWSkgPC0gcm93bmFtZXMocm5hLmNvbW1vbi5jb21wKQoKCnRzbmUucm5hLmNvbXAkWSAlPiUKICBhcy50aWJibGUocm93bmFtZXM9ImNlbGwiKSAlPiUKICBtdXRhdGUoY2x1c3Q9cm5hX2NsdXN0c1tjZWxsXSkgJT4lCiAgZ2dwbG90KGFlcyhWMSwgVjIsIGNvbG9yPWNsdXN0KSkgKyAKICBnZW9tX3BvaW50KCkKCnRzbmUucm5hLmNvbW1vbi5jb21wJFkgJT4lCiAgYXMudGliYmxlKHJvd25hbWVzPSJjZWxsIikgJT4lCiAgbXV0YXRlKGNsdXN0PXJuYV9jbHVzdHNbY2VsbF0pICU+JQogIGdncGxvdChhZXMoVjEsIFYyLCBjb2xvcj1jbHVzdCkpICsgCiAgZ2VvbV9wb2ludCgpCgpgYGAKCiMjIyBDb21wYXJlIHdlaWdodHMgb2YgZGF0YXNldCBzcGVjaWZpYyBhbmQgY29tbW9uIGZhY3RvcnMKYGBge3J9CmludC5wYm1jQFcgJT4lIAogIG1lbHQodmFybmFtZXMgPSBjKCJmYWN0b3IiLCAiZ2VuZSIpKSAlPiUKICBmaWx0ZXIoZmFjdG9yPT0xKSAlPiUKICBtdXRhdGUocmFuaz1yYW5rKHZhbHVlKSkgJT4lCiAgZ2dwbG90KGFlcyhyYW5rLCB2YWx1ZSkpICsgCiAgZ2VvbV9wb2ludCgpICsKICBnZW9tX3RleHRfcmVwZWwoZGF0YT0uICU+JSBmaWx0ZXIocmFuayA+IDIzMDApLCBhZXMobGFiZWw9Z2VuZSkpCmBgYApgYGB7cn0KVl9hdGFjIDwtIGludC5wYm1jQFYkYXRhYyAKY29sbmFtZXMoVl9hdGFjKSA8LSBjb2xuYW1lcyhpbnQucGJtY0BXKQoKVl9ybmEgPC0gaW50LnBibWNAViRybmEKY29sbmFtZXMoVl9ybmEpIDwtIGNvbG5hbWVzKGludC5wYm1jQFcpCgpWX2F0YWMgJT4lIAogIG1lbHQodmFybmFtZXMgPSBjKCJmYWN0b3IiLCAiZ2VuZSIpKSAlPiUKICBmaWx0ZXIoZmFjdG9yPT0xKSAlPiUKICBtdXRhdGUocmFuaz1yYW5rKHZhbHVlKSkgJT4lCiAgZ2dwbG90KGFlcyhyYW5rLCB2YWx1ZSkpICsgCiAgZ2VvbV9wb2ludCgpICsKICBnZW9tX3RleHRfcmVwZWwoZGF0YT0uICU+JSBmaWx0ZXIocmFuayA+IDIzMDApLCBhZXMobGFiZWw9Z2VuZSkpICsKICBnZ3RpdGxlKCd0b3AgViBBVEFDJykKClZfcm5hICU+JSAKICBtZWx0KHZhcm5hbWVzID0gYygiZmFjdG9yIiwgImdlbmUiKSkgJT4lCiAgZ3JvdXBfYnkoZmFjdG9yKSAlPiUKICBtdXRhdGUocmFuaz1yYW5rKHZhbHVlKSkgJT4lCiAgdW5ncm91cCgpICU+JQogIGdncGxvdChhZXMocmFuaywgdmFsdWUpKSArIAogIGdlb21fcG9pbnQoKSArCiAgIyBnZW9tX3RleHRfcmVwZWwoZGF0YT0uICU+JSBmaWx0ZXIocmFuayA+IDIzMTApLCBhZXMobGFiZWw9Z2VuZSkpICsKICBmYWNldF93cmFwKGZhY3Rvcn4uKSArCiAgZ2d0aXRsZSgndG9wIFYgQVRBQycpCgpgYGAKYGBge3J9CmdlbmVzLmJjIDwtIHJlYWQudGFibGUoZmlsZSA9ICIuLi9teV9kYXRhL2NlbGxyYW5nZXItYXRhYzExMF9jb3VudF8zMDQzOV9XU1NTODAzODM2MF9HUkNoMzgtMV8xXzAuZ2VuZXNfYmMuYmVkIiwgc2VwID0gIlx0IiwgYXMuaXMgPSBjKDEpLCBoZWFkZXIgPSBGQUxTRSkKCmdlbmVzLmJjCmBgYAoKCgoKCgoKCgoKCgo=